#!/bin/bash

export BASE_DATA_PATH=/home/mixagol/data

zcat ${BASE_DATA_PATH}/1_databases/genes.txt.gz   | cut -f1 | nl -v0 -n ln -w1 > ${BASE_DATA_PATH}/3_normed_matrix/map_int_gene.txt
zcat ${BASE_DATA_PATH}/1_databases/genomes.txt.gz | cut -f1 | nl -v0 -n ln -w1 > ${BASE_DATA_PATH}/3_normed_matrix/map_int_genom.txt

zcat ${BASE_DATA_PATH}/2_raw_matrix/results.txt \
    | cut -f1,2,11 \
    | 3_normed_matrix/apply_maps.py \
            --genes-map ${BASE_DATA_PATH}/3_normed_matrix/map_int_gene.txt \
            --genomes-map ${BASE_DATA_PATH}/3_normed_matrix/map_int_genom.txt \
    | gzip \
    > ${BASE_DATA_PATH}/3_normed_matrix/matrix.txt.gz

for E_VALUE in 1e+01 1e+00 1e-01 5e-02 1e-02 1e-03 1e-04 1e-05 1e-06 1e-07 1e-08 1e-09 1e-10 1e-11 1e-12 1e-13 1e-14 1e-15 1e-20 1e-30 1e-40 1e-50; do
    zcat ${BASE_DATA_PATH}/3_normed_matrix/matrix.txt.gz \
        | awk -F'\t' '{if ($3<='$E_VALUE') print $1"\t"$2}' \
        | 3_normed_matrix/create_matrix.py \
        > ${BASE_DATA_PATH}/3_normed_matrix/matrix_${E_VALUE}_.txt
done


#export GENOMES_NUM=`cat "${BASE_DATA_PATH}/3_normed_matrix/map_int_genom.txt" | wc -l`

#g++ -O3 3_normed_matrix/pre_calc_intersections.cpp -o 3_normed_matrix/pre_calc_intersections
#
#3_normed_matrix/run_distmi_cluster.sh $GENOMES_NUM /home/mixagol/work/data/dist_mutual_information_${GENOMES_NUM}
#ls /home/mixagol/work/data/dist_mutual_information_${GENOMES_NUM}/* \
#    | xargs -n1 cat \
#    | sort -t$'\t' -n -k1,1 -k2,2 \
#    | uniq \
#    > ${BASE_DATA_PATH}/3_normed_matrix/dist_mutual_information.txt
#rm -rf /home/mixagol/work/data/dist_mutual_information_${GENOMES_NUM}/*
